#!/bin/bash
# File Name   : mongo_inc_backup.sh
# Author      : moshan
# Mail        : mo_shan@yeah.net
# Created Time: 2019-08-23 15:42:08
# Function    : 
#########################################################################
#
# This file is used by cron to Backup the data of oplog collection,the collection is part of local DB.
# The oplog (operations log) is a special capped collection that keeps a rolling record of all operations 
# that modify the data stored in your databases.All replica set members contain a copy of the oplog, 
# in the local.oplog.rs collection, which allows them to maintain the current state of the database.
# Each operation in the oplog is idempotent. That is, oplog operations produce the same results 
# whether applied once or multiple times to the target dataset.
#
# We backup the collections by periodicity to restore the DB  in case of  DB disaster 
# The version is defined V.001
# Version   ModifyTime                ModifyBy              Desc
# Ver001    2018-11-06 17:00         xuchangpei             Create the Scripts File
#
#
#### 请在此处输入关键参数，例如程序路径，账号，密码，实例端口###
command_linebin="/data/mongodb/base/bin/mongo"
mongo_path="/data/mongodb/base/bin"
username="root"
password="123456"
port="27017"
#
# This file is used by cron to Backup the data of oplog collection,the collection is part of local DB.
# The oplog (operations log) is a special capped collection that keeps a rolling record of all operations 
# that modify the data stored in your databases.All replica set members contain a copy of the oplog, 
# in the local.oplog.rs collection, which allows them to maintain the current state of the database.
# Each operation in the oplog is idempotent. That is, oplog operations produce the same results 
# whether applied once or multiple times to the target dataset.
#
# We backup the collections by periodicity to restore the DB  in case of  DB disaster 
# The version is defined V.001
# Version   ModifyTime                ModifyBy              Desc
# Ver001    2018-11-06 17:00         xuchangpei             Create the Scripts File
#
#
if [ ! -d "/data/mongodb_back/mongodboplog_back/mongo$port" ]
then
  mkdir -p /data/mongodb_back/mongodboplog_back/mongo$port
fi

if [ ! -d "/data/mongodb_back/mongodboplog_back/log/$port" ]
then
  mkdir -p /data/mongodb_back/mongodboplog_back/log/$port
fi

bkdatapath=/data/mongodb_back/mongodboplog_back/mongo$port
bklogpath=/data/mongodb_back/mongodboplog_back/log/$port

####comments end ##

logfilename=$(date -d today +"%Y%m%d")

echo "===================================Message --=MongoDB 端口为" $port "的差异备份开始，开始时间为" $(date -d today +"%Y%m%d%H%M%S") >> $bklogpath/$logfilename.log

ParamBakEndDate=$(date +%s)
echo "Message --本次备份时间参数中的结束时间为：" $ParamBakEndDate >> $bklogpath/$logfilename.log

DiffTime=$(expr 65 \* 60)

echo "Message --备份设置的间隔时间为：" $DiffTime >> $bklogpath/$logfilename.log


ParamBakStartDate=$(expr $ParamBakEndDate - $DiffTime)
echo "Message --本次备份时间参数中的开始时间为：" $ParamBakStartDate >> $bklogpath/$logfilename.log

####上文中的DiffTime用来备份前检查oplog 中的数据是否满足要求,即最早的一笔数据要满足在 65分钟前,那么备份后应该也要检查一下.防止在备份的过程中有大量的操作,将前面的尚未导出的oplog数据覆盖点. 例如,08:00执行导出,备份前最早的一笔数据要在06:00
####执行完毕后,再次检查oplog中最早的一笔数据,只是要在07:00 之前,在此我们要求在06:59 即 61分钟前.再次增加一个时间参数 用来表示备份后,oplog 必须满足的时间要求.参数命名为 

DiffTime=$(expr 61 \* 60)
ParamAfterBakRequestStartDate=$(expr $ParamBakEndDate - $DiffTime)
echo "Message --为保证备份的连续性,本次备份后,oplog中的开始时间需小于：" $ParamAfterBakRequestStartDate >> $bklogpath/$logfilename.log
##### end 

bkfilename=$(date -d today +"%Y%m%d%H%M%S")

#### comments1 start 获取数据库中oplog记录的开始范围，防止导出的数据不完整 ####

command_line="${command_linebin} localhost:$port/admin -u$username -p$password"

opmes=$(/bin/echo "db.printReplicationInfo()" | $command_line --quiet)

echo $opmes > opdoctime$port.tmplog

opbktmplogfile=opdoctime$port.tmplog

#opstartmes=$(grep "oplog first event time" $opmes)

opstartmes=$(grep "oplog first event time" $opbktmplogfile | awk -F 'CST' '{print $1}' | awk -F 'oplog first event time: '  '{print $2}' | awk -F ' GMT' '{print $1}'  )

echo "Message --oplog集合记录的开始时间为："$opstartmes >> $bklogpath/$logfilename.log

oplogRecordFirst=$(date -d "$opstartmes"  +%s)

echo "Message --oplog集合记录的开始时间为:" $oplogRecordFirst >> $bklogpath/$logfilename.log

##begin 比较备份参数的开始时间是否在oplog记录的时间范围内
if [ $oplogRecordFirst -le $ParamBakStartDate ]
then
echo "Message --检查设置备份时间合理。备份参数的开始时间在oplog记录的时间范围内。" >> $bklogpath/$logfilename.log
else echo "Fatal Error --检查设置的备份时间不合理合理。备份参数的开始时间不在oplog记录的时间范围内。请调整oplog size或调整备份频率。本次备份可以持续进行，但还原时数据完整性丢失。" >> $bklogpath/$logfilename.log
fi

##end##

#### comments1 end  ####

## 调整一下命令，将备份的过程打印到log文件中，我们可以从local.oplog.rs导出的文档数据量来评估，一个周期内的操作量，和预估如果恢复可能的耗时

##dumpmsg=$(/data/mongodb/mongobin344/bin/mongodump -h localhost --port $port --authenticationDatabase admin -u$username -p$password -d local -c oplog.rs  --query '{ts:{$gte:Timestamp('$ParamBakStartDate',1),$lte:Timestamp('$ParamBakEndDate',9999)}}' -o $bkdatapath/mongodboplog$bkfilename)

${mongo_path}/mongodump -h localhost --port $port --authenticationDatabase admin -u$username -p$password -d local -c oplog.rs  --query '{ts:{$gte:Timestamp('$ParamBakStartDate',1),$lte:Timestamp('$ParamBakEndDate',9999)}}' -o $bkdatapath/mongodboplog$bkfilename >> $bklogpath/$logfilename.log 2>&1

#echo "本次导出的具体信息如下：" $dumpmsg
#echo $dumpmsg >> $bklogpath/$logfilename.log

## 调整结束

#### comments2 start  再次检查，防止导出oplog数据过程耗时过长，因oplog是固定集合，如果操作期间有大量的操作，则oplog中新的数据会覆盖掉旧的数据，就可能导致导出的数据不完整，无法保证增量文件间的时间连续性。因此备份后再次检查####
opmes=$(/bin/echo "db.printReplicationInfo()" | $command_line --quiet)
echo $opmes > opdoctime$port.tmplog
opbktmplogfile=opdoctime$port.tmplog
opstartmes=$(grep "oplog first event time" $opbktmplogfile | awk -F 'CST' '{print $1}' | awk -F 'oplog first event time: '  '{print $2}' | awk -F ' GMT' '{print $1}'  )
echo "Message --执行备份后,oplog集合记录的开始时间为："$opstartmes >> $bklogpath/$logfilename.log
oplogRecordFirst=$(date -d "$opstartmes"  +%s)
echo "Message --执行备份后,oplog集合记录的开始时间为[时间格式化]:" $oplogRecordFirst >> $bklogpath/$logfilename.log
##begin 比较备份参数的开始时间是否在oplog记录的时间范围内
if [ $oplogRecordFirst -le $ParamAfterBakRequestStartDate ]
then
echo "Message --备份后，检查oplog集合中数据的开始时间，即集合中最早的一笔数据，时间不小于61分钟的时间（即参数 ParamAfterBakRequestStartDate）。这样可以保证每个增量备份含有最近一个小时的全部op操作，满足文件的持续完整性，逐个还原无丢失数据风险。" >> $bklogpath/$logfilename.log
else echo "Fatal Error --备份后，检查oplog集合的涵盖的时间范围过小（小于61min）。设置的备份时间不合理合理，备份后的文件不能完全涵盖最近60分钟的数据。请调整oplog size或调整备份频率。本次备份可以持续进行，但还原时数据完整性丢失。" >> $bklogpath/$logfilename.log
fi
#### comments2 end ####


#### comments3 检查备份文件是否已经删除start ####
if [ -d "$bkdatapath/mongodboplog$bkfilename" ]
then
  echo "Message --检查此次备份文件已经产生.文件信息为:" $bkdatapath/mongodboplog$bkfilename >> $bklogpath/$logfilename.log
  else echo "Fatal Error --备份过程已执行，但是未检测到备份产生的文件，请检查！" >> $bklogpath/$logfilename.log
fi
##### comments3 end ####

#### comments4 start 删除历史备份文件，保留3天，如需调整，请在持续设置
keepbaktime=$(date -d '-3 days' "+%Y%m%d%H")*
if [ -d $bkdatapath/mongodboplog$keepbaktime ]
then
  rm -rf $bkdatapath/mongodboplog$keepbaktime
  echo "Message -- $bkdatapath/mongodboplog$keepbaktime 删除完毕" >> $bklogpath/$logfilename.log
fi
### comments4 end 


echo "============================Message --MongoDB 端口为" $port "的差异备份结束，结束时间为：" $(date -d today +"%Y%m%d%H%M%S") >> $bklogpath/$logfilename.log
